location <- "nemo"
if (location == "jane") {
base_dir <- "/Volumes/lab-gandhis/home/users/wagena/periph_immune_neurodegen/"
} else if (location == "nemo") {
base_dir <- here::here()
options(bitmapType='cairo') # Add graphics device for nemo
}
# Set path
here::i_am("docs/02_analyse_coloc_results.Rmd")args <- list(
eqtl_raw_data_dir = file.path(base_dir, "raw_data",
"ebi_eqtl_catalogue"),
gwas_results_path = file.path(base_dir, "/nemo/lab/gandhis/home/users/wagena/references/GWAS/"),
AD_genes_file = str_c(base_dir, "/derived_data/Goate_neurobiodis2020_AD_genes_w_destrooper.csv"),
PD_genes_file = str_c(base_dir, "/derived_data/Blauendraat_lancetneuro2020_table1_PDgenes.csv"),
aquino_foo_coloc_path = file.path(base_dir, "processed_data",
"foo_aquino",
"coloc"),
aquino_eqtl_summary_path = file.path(base_dir, "raw_data",
"GWAS",
"aquino_2023_covid_eqtl/aquino_summary_table.csv"),
aquino_nalls_coloc_path = file.path(base_dir, "processed_data",
"nalls_aquino",
"coloc"),
aquino_rizig_coloc_path = file.path(base_dir, "processed_data",
"rizig_aquino",
"coloc"),
ota_foo_coloc_path = file.path(base_dir, "processed_data",
"ota_foo_complete",
"coloc"),
rizig_nedelec_quach_coloc_path = file.path(base_dir, "processed_data",
"rizig_nedelec_quach",
"coloc"),
aquino_jansen_coloc_path = file.path(base_dir, "processed_data",
"jansen_aquino",
"coloc"),
aquino_shigemizu_coloc_path = file.path(base_dir, "processed_data",
"shigemizu_aquino",
"coloc"),
eqtl_dir = file.path(base_dir, "raw_data/GWAS/aquino_2023_covid_eqtl"),
ota_foo_lrrk2_snca_results_path = file.path(base_dir, "processed_data/foo_jama_2020/coloc/ota_foo_lrrk2_snca_results.RDS"),
eqtl_sample_number = 222, # There are 222 individuals included in the aquino dataset
panel_app_genes_file = "~/AIediting/references/panel_app_PD_complex_park_gene_list.tsv",
nalls_mend_rand_file = "~/references/GWAS/GWAS_Nalls_2019_mendrand.csv",
nalls_tidy_varbeta = file.path(base_dir, "raw_data/GWAS/coloc_munged_data/PD2019_ex23andMe_tidy_varbeta.txt")
)Aim: This analysis explores whether expression of genes that cause neurodegenerative diseases (NDDs) in peripheral immune cells can mediate the risk of NDDs. This is done in an ancestry-specific manner, using genetic colocalisation.
Coloc uses a Bayesian approach to test the following hypotheses:
The prior probabilities for the Beyesian framework were specified as follows:
Interpretation of results:
This analysis utilises the wrapper functions within the colochelper package, written by David Zhang and Regina Reynolds.
This analysis uses as its baseline the multi-ancestry eQTL study from Aquino et al 2023, noting that it incorporates samples from African, East Asian and European donors, across multiple cell types and activation states.
They took blood from 222 donors (80 Central African, 80 West European, 62 East Asian) and exposed this to either COVID or Influenza before 10x single cell sequencing. They derived the following 5 cell types and 22 subtypes:
Each of these were sequenced untreated, or after exposure to Covid or influenza. eQTLs were derived at baseline, as well as response QTLs.
# Load the summary table that has been presaved. It has been generated using the code below:
eqtl_df <- read_csv(args$aquino_eqtl_summary_path)
aquino_cell_order <-c("Dendritic-classic",
"Dendritic-plasmacytoid",
"Monocyte",
"Mono-cd14",
"Mono-cd14-infected",
"Mono-cd16",
"NK",
"NK-CD56-bright",
"NK-CD56-dim",
"NK-memory-like",
"NK-innate-lymphoid",
"B-cell",
"B-memory-k",
"B-memory-l",
"B-naive-k",
"B-naive-l",
"Plasmablast",
"T-CD4",
"T-CD4-effector",
"T-CD4-naive",
"T-reg",
"T-CD8",
"T-CD8-central-effector-mem",
"T-CD8-effector-mem-reexpress-cd45RA",
"T-CD8-naive",
"T-gammadelta",
"T-mucosal-assoc-invariant"
)
Other datasets included in this study include:
GWASs in this study has been processed using the following script:
source(file.path(base_dir, "01_munge_GWASs_for_coloc.R"))The AD genes of interest were derived from Alison Goates Genetic architecture of AD in Neurobiology of disease (2020). The genes selected were those highlighted in red, indicated increased confidence of causation.
AD_gene_df <- read_delim(args$AD_genes_file) %>%
dplyr::filter(Confident_goate == "Y")
AD_genes <- tibble(disease = "AD",
gene = AD_gene_df$`Reported gene`)
AD_gene_list <- AD_genes$gene %>% unlist()
AD_gene_list## [1] "CR1" "PSEN2" "BIN1" "UNC5C" "TREM2" "SPI1" "SORL1" "PSEN1"
## [9] "ADAM10" "PLCG2" "ABI3" "ABCA7" "NOTCH3" "PLD3" "APOE" "CD33"
## [17] "PRNP" "APP"
The analysis was undertaken with the following script:
source(here::here("r_scripts/02a_run_coloc_gwas_immune_aquino_jansen.R")The results were then processed using the following script:
source(here::here("r_scripts",
"03a_process_jansen_aquino_coloc_results.R")all_results <- readRDS(str_c(args$aquino_jansen_coloc_path, "/coloc_summary.Rds"))
results <- all_results$liberal
results <- results %>%
separate(dataset,
into = c("Eqtl_type", "Cell_resolution", "Cell_name", "Treatment"),
sep = "_",
remove = F) %>%
dplyr::mutate(significant = case_when(PP.H4.abf >=0.75 ~ "Colocalised",
PP.H3.abf >= 0.75 ~ "Distinct",
sum_PPH3_PPH4 >0.5 &
ratio_PPH4_PPH3 > log2(9) &
PP.H4.abf < 0.75 ~ "Suggestive",
TRUE ~ "Not_signif"),
Treatment = as_factor(Treatment) %>% fct_relevel(c("NS", "IAV", "COV")),
Cell_name = as_factor(Cell_name) %>% fct_relevel(aquino_cell_order)
) %>%
dplyr::filter(gene_name %in% AD_gene_list)
write_csv(results,
file = file.path(base_dir, "derived_data/jansen_aquino_AD_gene_results.csv"))
NDD_gene_heatmap <- results %>%
dplyr::filter(significant != "Not_signif",
Eqtl_type == "expression") %>%
ggplot(aes(x = Treatment, y = fct_rev(Cell_name), fill = significant)) +
geom_tile() +
theme(panel.grid = element_blank(),
legend.position = "bottom") +
scale_fill_manual(values = my_palette) +
facet_grid(. ~gene_name, switch = "y") +
labs(fill = NULL,
y = "Cell type")
ggsave(NDD_gene_heatmap,
file = file.path(base_dir, "figures/Supp_Jansen_aquino_AD_genes.pdf"),
units = "cm", width = 17, height = 12)
ggsave(NDD_gene_heatmap,
file = file.path(base_dir, "figures/Supp_Jansen_aquino_AD_genes.png"),
units = "cm", width = 17, height = 12)
NDD_gene_heatmap +
labs(title = "Aquino with Jansen GWAS")Exploring p values for individual SNPs contributing to a locus allows confirmation of a true colocalisation - if the SNPs of highest significance of one trait are also of significance for another, it is likely that this is a true signal.
The beta comparison plots allows exploration re whether the direction of the significant signals align or not, based on the correlation of betas between one trait and another. In the eQTLs, the slope indicates the effect size of alternative alleles (not the ref allele found in the human genome reference) - this is concordant with the GWAS betas so directions of results can be compared directly.
Using the following bash code to generate the list of .rda files:
find processed_data/jansen_aquino/coloc/ -type f -name "*.rda" > processed_data/jansen_aquino/coloc/jansen_aquino_rda_list.txtThis code then compares the p-values and direction of effect of SNPs between the GWAS and eQTL:
genes_to_compare <- "BIN1"
eqtl_to_compare <- "expression_celltype_Monocyte_NS"
chromosome_lookup <- tibble(gene_name = c("BIN1"),
chromosome = c("chr2")
)
jansen_aquino_rda_list <- read_tsv(str_c(args$aquino_jansen_coloc_path, "/jansen_aquino_rda_list.txt"),
col_names = "file_path")
# Split file path into components: directory, file_name, eqtl, gwas_path, gwas, prior_tyoe, gene
results_dir <- jansen_aquino_rda_list %>%
dplyr::mutate(dir = file_path %>%
str_replace(., "/[^/]*$", "") %>%
str_replace(., "/[^/]*$", ""),
file_name = basename(file_path) %>%
str_replace(., ".rda", ""),
eqtl = basename(dir),
gwas_path = dir %>%
str_replace(., "/[^/]*$", ""),
gwas = basename(gwas_path),
prior_type = file_path %>%
str_replace(., "/[^/]*$", "") %>%
basename(),
gene = str_replace(file_name, ".+_(.*)", "\\1")
) %>%
dplyr::filter(prior_type == "liberal")
loci_to_explore <- results %>%
dplyr::filter(str_detect(Eqtl_type, "expression"),
gene_name %in% genes_to_compare, #Filter to match the results we are exploring
dataset %in% eqtl_to_compare
)
# The aquino eqtls are divided by chromosome, so I will join the correct path from the aquino eqtl summary to the corresponding coloc result. This will allow loading of the original eqtl path to allow filtering by the SNP identifier (rsNumber for snps involved in that coloc.
sig_coloc_files <- left_join(loci_to_explore,
results_dir,
by = c("gene_2" = "gene",
"dataset" = "eqtl")
) %>%
left_join(.,
chromosome_lookup, # Add in chromosomes of the different genes
by = "gene_name") %>%
left_join(.,
eqtl_df %>%
dplyr::select(eqtl_name, eqtl_chromosome, eqtl_path),
by = c("dataset" = "eqtl_name",
"chromosome" = "eqtl_chromosome"))jansen_coloc_loci_list <- list()
# Load the original gwas
gwas_path = args$jansen_tidy_varbeta
gwas <- fread(gwas_path)
for(i in 1:nrow(sig_coloc_files)){
gwas_name = "jansen"
eqtl_name = sig_coloc_files$dataset[i]
coloc_results_path = file.path(base_dir, sig_coloc_files$file_path[i])
eqtl_path = sig_coloc_files$eqtl_path[i]
eqtl_path <- sub(".*SNCA/", str_c(base_dir, "/"), eqtl_path) # Replace anything before and including 'SNCA/' with the new base directory
gene_name = sig_coloc_files$gene_name[i]
chromosome_number <- sig_coloc_files$chromosome[i]
coloc_significance = sig_coloc_files$significant[i]
coloc_locus = str_c(gwas_name, " - ", eqtl_name, " - ", gene_name)
coloc_label = str_c(gwas_name, " - ", eqtl_name, " - ", gene_name , " - ", coloc_significance)
print(str_c("Locus number is: ", i))
print(str_c("Locus is: ", coloc_label))
print(eqtl_name)
# Load the original eqtl
eqtl <- fread(eqtl_path)
eqtl %<>%
dplyr::mutate(eqtl_dataset = eqtl_name) %>%
dplyr::select(eqtl_dataset,
gene,
SNP = rsID,
beta = slope,
se = slope_se,
p.value = pvalue,
Al1 = ALT,
Al2 = REF)
# Load the coloc results for this loci - this command loads a data called 'coloc_results_annotated'
load(coloc_results_path)
# Select out SNPs to explore
SNPS_to_explore <- coloc_results_annotated$results$snp
gwas_snps <- gwas %>%
dplyr::filter(SNP %in% SNPS_to_explore)
eqtl_snps <- eqtl %>%
dplyr::filter(SNP %in% SNPS_to_explore) %>%
arrange(p.value) %>% # Arrange rows within each SNP group by p-value
distinct(SNP, .keep_all = TRUE) %>% # Keep the SNP entry with the lowest p value, as per the coloc analysis
ungroup() %>%
arrange(SNP)
message("Join eqtl with gwas and check direction")
# Join the gwas and eqtl datasets - this is dorn using the colochelpr function which raises warnings if the datasets are not aligned properly.
locus_gwas_qtl_overlap <- colochelpR::join_coloc_datasets(df1 = gwas_snps,
df2 = eqtl_snps,
harmonise = T) %>%
arrange(SNP)
data_to_plot <- locus_gwas_qtl_overlap %>%
dplyr::select(SNP, gene = gene_2, tissue = eqtl_dataset_2,
Al1_gwas = Al1_1, Al2_gwas = Al2_1, beta_gwas = beta_1, p_gwas = p.value_1,
Al1_eqtl = Al1_2, Al2_eqtl = Al2_2, beta_eqtl = beta_2, p_eqtl = p.value_2) %>%
dplyr::mutate(log_p_gwas = -log10(p_gwas),
log_p_eqtl = -log10(p_eqtl))
# Check if Allele 1 and 2 match between datasets, by adding a column which will be TRUE if this is the case
directionality_check <-
data_to_plot %>%
dplyr::mutate(matched_effect_and_alt_allele = case_when(Al1_gwas == Al1_eqtl & Al2_gwas == Al2_eqtl ~ TRUE,
TRUE ~ FALSE),
swapped_effect_and_alt_allele = case_when(matched_effect_and_alt_allele == FALSE & Al1_gwas == Al2_eqtl & Al2_gwas == Al1_eqtl ~ TRUE,
TRUE ~ FALSE))
directionality_check %>%
count(matched_effect_and_alt_allele,
swapped_effect_and_alt_allele)
SNPs_to_exclude <- directionality_check %>%
dplyr::filter(matched_effect_and_alt_allele == FALSE, swapped_effect_and_alt_allele== FALSE) %>%
.[["SNP"]]
SNPs_to_swap <- directionality_check %>%
dplyr::filter(matched_effect_and_alt_allele == FALSE, swapped_effect_and_alt_allele== TRUE) %>%
.[["SNP"]]
harmonised_alleles <-
data_to_plot %>%
dplyr::filter(!SNP %in% SNPs_to_exclude) %>%
dplyr::mutate(beta_2 = case_when(SNP %in% SNPs_to_swap ~ (-beta_eqtl),
TRUE ~ beta_eqtl),
Al1_eqtl = case_when(SNP %in% SNPs_to_swap ~ Al1_gwas,
TRUE ~ Al1_eqtl),
Al2_eqtl = case_when(SNP %in% SNPs_to_swap ~ Al2_gwas,
TRUE ~ Al2_eqtl))
harmonised_alleles <- harmonised_alleles %>%
dplyr::mutate(genome_wide_signif = case_when(p_gwas < 1e-5 | p_eqtl < 5e-8 ~ TRUE,
TRUE ~ FALSE) %>%
factor(., levels = c(TRUE, FALSE))) %>%
arrange(log_p_gwas)
print("Creating plots")
p_plot <- harmonised_alleles %>%
ggplot(aes(x = log_p_eqtl, y = log_p_gwas, colour = log_p_gwas)) +
geom_point(size = 0.7, alpha = 0.6) +
scale_colour_gradient(low = "grey80", high = "darkslategrey") +
ggpubr::stat_cor(method = "spearman", colour = "black", size = 4) +
theme_aw +
labs(x = "-log10(eQTL p-value)",
y = "-log10(GWAS p-value)",
colour = "-log10(GWAS p-value)",
subtitle = coloc_label)
beta_plot <- harmonised_alleles %>%
ggplot(aes(x = beta_eqtl, y = beta_gwas, colour = log_p_gwas)) +
geom_point(alpha = 0.6, size = 0.7) +
geom_smooth(formula = y ~ x, method = "lm", level = 0.99, colour = "black", size = 0.5, fill = "#4DA2B7") +
ggpubr::stat_cor(method = "spearman", colour = "black", size = 4) +
scale_colour_gradient(low = "grey80", high = "darkslategrey") +
theme_aw +
labs(x = "eQTL beta",
y = "GWAS beta",
colour = "-log10(GWAS p-value)",
subtitle = coloc_label)
comparison_locus_plot <- ((p_plot + beta_plot)) +
plot_layout(guides = "collect") +
plot_annotation(subtitle = str_c("Coloc results: ", coloc_label), tag_levels = "A") &
theme(legend.position = "bottom")
jansen_coloc_loci_list[[coloc_locus]] <- list(
locus_data = harmonised_alleles,
locus_plot = comparison_locus_plot)
print("Locus complete")
}
saveRDS(jansen_coloc_loci_list,
file = file.path(base_dir, "processed_data/jansen_aquino/coloc/loci_BIN1.RDS"))# Explore colocalised loci:
locus_p_value_limits <- c(0, 15)
jansen_coloc_loci_list <- readRDS(file.path(base_dir, "processed_data/jansen_aquino/coloc/loci_BIN1.RDS"))
loci_to_explore <- jansen_coloc_loci_list$`jansen - expression_celltype_Monocyte_NS - BIN1`$locus_data
loci_to_explore <- loci_to_explore %>%
arrange(log_p_gwas)
bin1_p_plot <- loci_to_explore %>%
ggplot(aes(x = log_p_eqtl, y = log_p_gwas, colour = genome_wide_signif)) +
geom_point(size = 1.2, alpha = 1, shape = 16) +
scale_colour_manual(values = my_palette) +
scale_fill_gradient(low = "grey80", high = "darkslategrey", limits = locus_p_value_limits) +
ggpubr::stat_cor(method = "pearson", colour = "black", size = 4) +
theme_aw +
labs(x = "Multi-ancestry eQTL \n-log10(p-value)",
y = "European AD GWAS \n-log10(p-value)",
colour = "Significant GWAS SNP")
bin1_beta_plot <- loci_to_explore %>%
dplyr::filter(beta_gwas < 1.5) %>%
ggplot(aes(x = beta_eqtl, y = beta_gwas, colour = genome_wide_signif)) +
geom_point(size = 1.2, alpha = 1, shape = 16) +
geom_smooth(formula = y ~ x, method = "lm", level = 0.95, colour = "black", size = 0.5, fill = "#4DA2B7") +
ggpubr::stat_cor(method = "pearson", colour = "black", size = 4) +
scale_colour_manual(values = my_palette) +
scale_fill_gradient(low = "grey80", high = "darkslategrey", limits = locus_p_value_limits) +
theme_aw +
labs(x = "Multi-ancestry eQTL \nbeta",
y = "European AD GWAS \nbeta",
colour = "Significant GWAS SNP")
bin1_jansen_aquino_locus_plot <- (bin1_p_plot + bin1_beta_plot) +
plot_layout(guides = "collect") &
theme(legend.position = "bottom")
bin1_jansen_aquino_locus_plotggsave(bin1_jansen_aquino_locus_plot,
file = file.path(base_dir, "figures/jansen_aquino_bin1_locus_plot.png"),
width = 18, height = 9, units = "cm")
ggsave(bin1_jansen_aquino_locus_plot,
file = file.path(base_dir, "figures/jansen_aquino_bin1_locus_plot.pdf"),
width = 18, height = 9, units = "cm")The analysis was undertaken with the following script:
source(here::here("r_scripts/02b_run_coloc_gwas_immune_aquino_shigemizu.R")The results were then processed using the following script:
source(here::here("r_scripts",
"03b_process_shigemizu_aquino_coloc_results.R") all_results <- readRDS(str_c(args$aquino_shigemizu_coloc_path, "/coloc_summary.Rds"))
results <- all_results$liberal
results <- results %>%
separate(dataset,
into = c("Eqtl_type", "Cell_resolution", "Cell_name", "Treatment"),
sep = "_",
remove = F) %>%
dplyr::mutate(significant = case_when(PP.H4.abf >=0.75 ~ "Colocalised",
PP.H3.abf >= 0.75 ~ "Distinct",
sum_PPH3_PPH4 >0.5 &
ratio_PPH4_PPH3 > log2(9) &
PP.H4.abf < 0.75 ~ "Suggestive",
TRUE ~ "Not_signif"),
Treatment = as_factor(Treatment) %>% fct_relevel(c("NS", "IAV", "COV")),
Cell_name = as_factor(Cell_name) %>% fct_relevel(aquino_cell_order)
) %>%
dplyr::filter(gene_name %in% AD_gene_list)
write_csv(results,
file = file.path(base_dir, "derived_data/shigemizu_aquino_AD_gene_results.csv"))
NDD_gene_heatmap <- results %>%
dplyr::filter(significant != "Not_signif",
Eqtl_type == "expression") %>%
ggplot(aes(x = Treatment, y = fct_rev(Cell_name), fill = significant)) +
geom_tile() +
theme(panel.grid = element_blank(),
legend.position = "bottom") +
scale_fill_manual(values = my_palette) +
facet_grid(. ~gene_name, switch = "y") +
labs(fill = NULL,
y = "Cell type")
ggsave(NDD_gene_heatmap,
file = file.path(base_dir, "figures/Supp_Shigemizu_aquino_AD_genes.pdf"),
units = "cm", width = 12, height = 16)
ggsave(NDD_gene_heatmap,
file = file.path(base_dir, "figures/Supp_Shigemizu_aquino_AD_genes.png"),
units = "cm", width = 12, height = 16)
NDD_gene_heatmap +
labs(subtitle = "East Asian AD GWAS (Shigemizu) and Multi-ancestry eQTL (Aquino)")PD_gene_df <- read_delim(args$PD_genes_file) %>%
dplyr::filter(Confidence_as_actual_PD_gene %in% c("Very high", "High"))
PD_genes <- tibble(disease = "PD",
gene = c(PD_gene_df$`Empty Cell`, "RAB32"))
PD_gene_list <- PD_genes$gene %>% unlist()
PD_gene_list ## [1] "SNCA" "PRKN" "PARK7" "LRRK2" "PINK1" "POLG" "ATP13A2"
## [8] "FBXO7" "GBA" "PLA2G6" "VPS35" "DNAJC6" "SYNJ1" "VPS13C"
## [15] "RAB32"
all_results <- readRDS(str_c(args$aquino_nalls_coloc_path, "/coloc_summary.Rds"))
figures_path <- args$aquino_nalls_coloc_pathThe analysis was undertaken with the following script:
source(here::here("r_scripts/02c_run_coloc_gwas_immune_aquino_nalls.R")The results were then processed using the following script:
source(here::here("r_scripts",
"03c_process_nalls_aquino_coloc_results.R")all_results <- readRDS(str_c(args$aquino_nalls_coloc_path, "/coloc_summary.Rds"))
results <- all_results$liberal
results <- results %>%
separate(dataset,
into = c("Eqtl_type", "Cell_resolution", "Cell_name", "Treatment"),
sep = "_",
remove = F) %>%
dplyr::mutate(significant = case_when(PP.H4.abf >=0.75 ~ "Colocalised",
PP.H3.abf >= 0.75 ~ "Distinct",
sum_PPH3_PPH4 >0.5 &
ratio_PPH4_PPH3 > log2(9) &
PP.H4.abf < 0.75 ~ "Suggestive",
TRUE ~ "Not_signif"),
Treatment = as_factor(Treatment) %>% fct_relevel(c("NS", "IAV", "COV")),
Cell_name = as_factor(Cell_name) %>% fct_relevel(aquino_cell_order)
) %>%
dplyr::filter(gene_name %in% PD_gene_list)
write_csv(results,
file = file.path(base_dir, "derived_data/nalls_aquino_PD_gene_results.csv"))
NDD_gene_heatmap <- results %>%
dplyr::filter(significant != "Not_signif",
Eqtl_type == "expression") %>%
ggplot(aes(x = Treatment, y = fct_rev(Cell_name), fill = significant)) +
geom_tile() +
theme(panel.grid = element_blank(),
legend.position = "bottom") +
scale_fill_manual(values = my_palette) +
facet_grid(. ~gene_name, switch = "y") +
labs(fill = NULL,
y = "Cell type")
ggsave(NDD_gene_heatmap,
file = file.path(base_dir, "figures/Supp_Nalls_aquino_PD_genes.pdf"),
units = "cm", width = 14, height = 16)
ggsave(NDD_gene_heatmap,
file = file.path(base_dir, "figures/Supp_Nalls_aquino_PD_genes.png"),
units = "cm", width = 14, height = 16)
NDD_gene_heatmap +
labs(subtitle = "West European GWAS (Nalls) and Multi ancestry eQTL (Aquino)")cells_to_compare <- c("Monocyte", "Mono-cd14", "Dendritic-plasmacytoid")
eqtl_to_compare <- c("expression_subtype_Dendritic-plasmacytoid_NS", "expression_celltype_Monocyte_NS", "expression_subtype_Mono-cd14_NS", "expression_celltype_Monocyte_COV")
genes_to_compare = c("LRRK2", "GBA", "SNCA")
Nalls_aquino_genes_to_compare_results <- results %>%
dplyr::filter(gene_name %in% genes_to_compare,
dataset %in% eqtl_to_compare,
Eqtl_type == "expression")# Functions
find_coloc_results_paths <- function(directory_to_search, pattern_to_search = ".rda") {
#' Function that takes a directory output by colochelpR and searches within that directory and sub-directories for all files matching a pattern. It will output a dataframe with columns for: directory, file_name, gwas, eqtl, prior_type and gene.
#' @param directory_to_search parent directory to search within
#' @param pattern a string pattern to search in the parent directory and subdirectories
#' @return dataframe with columns for: directory, file_name, dataset, prior_type, and gene.
results_dir <- tibble(file_path = list.files(directory_to_search, recursive = T, full.names = T, pattern = pattern_to_search)) %>%
dplyr::mutate(dir = file_path %>%
str_replace(., "/[^/]*$", "") %>%
str_replace(., "/[^/]*$", ""),
file_name = basename(file_path) %>%
str_replace(., ".rda", ""),
eqtl = basename(dir),
gwas_path = dir %>%
str_replace(., "/[^/]*$", ""),
gwas = basename(gwas_path),
prior_type = file_path %>%
str_replace(., "/[^/]*$", "") %>%
basename(),
gene = str_replace(file_name, ".+_(.*)", "\\1")
)
return(results_dir)
}
nalls_aquino_rda_list <- read_tsv(str_c(args$aquino_nalls_coloc_path, "/nalls_aquino_rda_list.txt"),
col_names = "file_path")
results_dir <- nalls_aquino_rda_list %>%
dplyr::mutate(dir = file_path %>%
str_replace(., "/[^/]*$", "") %>%
str_replace(., "/[^/]*$", ""),
file_name = basename(file_path) %>%
str_replace(., ".rda", ""),
eqtl = basename(dir),
gwas_path = dir %>%
str_replace(., "/[^/]*$", ""),
gwas = basename(gwas_path),
prior_type = file_path %>%
str_replace(., "/[^/]*$", "") %>%
basename(),
gene = str_replace(file_name, ".+_(.*)", "\\1")
) %>%
dplyr::filter(prior_type == "liberal")
loci_to_explore <- results %>%
dplyr::filter(str_detect(Eqtl_type, "expression"),
gene_name %in% genes_to_compare, #Filter to match the results we are exploring
dataset %in% eqtl_to_compare
)
chromosome_lookup <- tibble(gene_name = c("SNCA", "LRRK2", "GBA"),
chromosome = c("chr4", "chr12", "chr1")
)
# The aquino eqtls are divided by chromosome, so I will join the correct path from the aquino eqtl summary to the corresponding coloc result. This will allow loading of the original eqtl path to allow filtering by the SNP identifier (rsNumber for snps involved in that coloc.
sig_coloc_files <- left_join(loci_to_explore,
results_dir,
by = c("gene_2" = "gene",
"dataset" = "eqtl")
) %>%
left_join(.,
chromosome_lookup, # Add in chromosomes of the different genes
by = "gene_name") %>%
left_join(.,
eqtl_df %>%
dplyr::select(eqtl_name, eqtl_chromosome, eqtl_path),
by = c("dataset" = "eqtl_name",
"chromosome" = "eqtl_chromosome"))
sig_coloc_filesnalls_coloc_loci_list <- list()
# Load the original gwas
gwas_path = args$nalls_tidy_varbeta
gwas <- fread(gwas_path)
for(i in 1:nrow(sig_coloc_files)){
gwas_name = "nalls"
eqtl_name = sig_coloc_files$dataset[i]
coloc_results_path = file.path(base_dir, sig_coloc_files$file_path[i])
eqtl_path = sig_coloc_files$eqtl_path[i]
eqtl_path <- sub(".*SNCA/", str_c(base_dir, "/"), eqtl_path) # Replace anything before and including 'SNCA/' with the new base directory
gene_name = sig_coloc_files$gene_name[i]
chromosome_number <- sig_coloc_files$chromosome[i]
coloc_significance = sig_coloc_files$significant[i]
coloc_locus = str_c(gwas_name, " - ", eqtl_name, " - ", gene_name)
coloc_label = str_c(gwas_name, " - ", eqtl_name, " - ", gene_name , " - ", coloc_significance)
print(str_c("Locus number is: ", i))
print(str_c("Locus is: ", coloc_label))
print(eqtl_name)
# Load the original eqtl
eqtl <- fread(eqtl_path)
eqtl %<>%
dplyr::mutate(eqtl_dataset = eqtl_name) %>%
dplyr::select(eqtl_dataset,
gene,
SNP = rsID,
beta = slope,
se = slope_se,
p.value = pvalue,
Al1 = ALT,
Al2 = REF)
# Load the coloc results for this loci - this command loads a data called 'coloc_results_annotated'
load(coloc_results_path)
# Select out SNPs to explore
SNPS_to_explore <- coloc_results_annotated$results$snp
gwas_snps <- gwas %>%
dplyr::filter(SNP %in% SNPS_to_explore)
eqtl_snps <- eqtl %>%
dplyr::filter(SNP %in% SNPS_to_explore) %>%
arrange(p.value) %>% # Arrange rows within each SNP group by p-value
distinct(SNP, .keep_all = TRUE) %>% # Keep the SNP entry with the lowest p value, as per the coloc analysis
ungroup() %>%
arrange(SNP)
message("Join eqtl with gwas and check direction")
# Join the gwas and eqtl datasets - this is dorn using the colochelpr function which raises warnings if the datasets are not aligned properly.
locus_gwas_qtl_overlap <- colochelpR::join_coloc_datasets(df1 = gwas_snps,
df2 = eqtl_snps,
harmonise = T) %>%
arrange(SNP)
data_to_plot <- locus_gwas_qtl_overlap %>%
dplyr::select(SNP, gene = gene_2, tissue = eqtl_dataset_2,
Al1_gwas = Al1_1, Al2_gwas = Al2_1, beta_gwas = beta_1, p_gwas = p.value_1,
Al1_eqtl = Al1_2, Al2_eqtl = Al2_2, beta_eqtl = beta_2, p_eqtl = p.value_2) %>%
dplyr::mutate(log_p_gwas = -log10(p_gwas),
log_p_eqtl = -log10(p_eqtl))
# Check if Allele 1 and 2 match between datasets, by adding a column which will be TRUE if this is the case
directionality_check <-
data_to_plot %>%
dplyr::mutate(matched_effect_and_alt_allele = case_when(Al1_gwas == Al1_eqtl & Al2_gwas == Al2_eqtl ~ TRUE,
TRUE ~ FALSE),
swapped_effect_and_alt_allele = case_when(matched_effect_and_alt_allele == FALSE & Al1_gwas == Al2_eqtl & Al2_gwas == Al1_eqtl ~ TRUE,
TRUE ~ FALSE))
directionality_check %>%
count(matched_effect_and_alt_allele,
swapped_effect_and_alt_allele)
SNPs_to_exclude <- directionality_check %>%
dplyr::filter(matched_effect_and_alt_allele == FALSE, swapped_effect_and_alt_allele== FALSE) %>%
.[["SNP"]]
SNPs_to_swap <- directionality_check %>%
dplyr::filter(matched_effect_and_alt_allele == FALSE, swapped_effect_and_alt_allele== TRUE) %>%
.[["SNP"]]
harmonised_alleles <-
data_to_plot %>%
dplyr::filter(!SNP %in% SNPs_to_exclude) %>%
dplyr::mutate(beta_2 = case_when(SNP %in% SNPs_to_swap ~ (-beta_eqtl),
TRUE ~ beta_eqtl),
Al1_eqtl = case_when(SNP %in% SNPs_to_swap ~ Al1_gwas,
TRUE ~ Al1_eqtl),
Al2_eqtl = case_when(SNP %in% SNPs_to_swap ~ Al2_gwas,
TRUE ~ Al2_eqtl))
harmonised_alleles <- harmonised_alleles %>%
dplyr::mutate(genome_wide_signif = case_when(p_gwas < 1e-5 | p_eqtl < 5e-8 ~ TRUE,
TRUE ~ FALSE) %>%
factor(., levels = c(TRUE, FALSE))) %>%
arrange(log_p_gwas)
print("Creating plots")
p_plot <- harmonised_alleles %>%
ggplot(aes(x = log_p_eqtl, y = log_p_gwas, colour = log_p_gwas)) +
geom_point(size = 0.7, alpha = 0.6) +
scale_colour_gradient(low = "grey80", high = "darkslategrey") +
ggpubr::stat_cor(method = "pearson", colour = "black", size = 4) +
theme_aw +
labs(x = "-log10(eQTL p-value)",
y = "-log10(GWAS p-value)",
colour = "-log10(GWAS p-value)",
subtitle = coloc_label)
beta_plot <- harmonised_alleles %>%
ggplot(aes(x = beta_eqtl, y = beta_gwas, colour = log_p_gwas)) +
geom_point(alpha = 0.6, size = 0.7) +
geom_smooth(formula = y ~ x, method = "lm", level = 0.99, colour = "black", size = 0.5, fill = "#4DA2B7") +
ggpubr::stat_cor(method = "pearson", colour = "black", size = 4) +
scale_colour_gradient(low = "grey80", high = "darkslategrey") +
theme_aw +
labs(x = "eQTL beta",
y = "GWAS beta",
colour = "-log10(GWAS p-value)",
subtitle = coloc_label)
comparison_locus_plot <- ((p_plot + beta_plot)) +
plot_layout(guides = "collect") +
plot_annotation(subtitle = str_c("Coloc results: ", coloc_label), tag_levels = "A") &
theme(legend.position = "bottom")
nalls_coloc_loci_list[[coloc_locus]] <- list(
locus_data = harmonised_alleles,
locus_plot = comparison_locus_plot)
print("Locus complete")
}
saveRDS(nalls_coloc_loci_list,
file = file.path(base_dir, "processed_data/nalls_aquino/coloc/loci_SNCA_LRRK2_GBA.RDS"))# Explore colocalised loci:
locus_p_value_limits <- c(0, 15)
nalls_coloc_loci_list <- readRDS(file.path(base_dir, "processed_data/nalls_aquino/coloc/loci_SNCA_LRRK2_GBA.RDS"))
loci_to_explore <- nalls_coloc_loci_list$`nalls - expression_celltype_Monocyte_COV - LRRK2`$locus_data
loci_to_explore <- loci_to_explore %>%
arrange(log_p_gwas)
lrrk2_p_plot <- loci_to_explore %>%
ggplot(aes(x = log_p_eqtl, y = log_p_gwas, colour = genome_wide_signif)) +
geom_point(size = 1.2, alpha = 1, shape = 16) +
scale_colour_manual(values = my_palette) +
scale_fill_gradient(low = "grey80", high = "darkslategrey", limits = locus_p_value_limits) +
ggpubr::stat_cor(method = "pearson", colour = "black", size = 4) +
theme_aw +
labs(x = "Multi-ancestry eQTL \n-log10(p-value)",
y = "European GWAS \n-log10(p-value)",
colour = "Significant GWAS SNP")
lrrk2_beta_plot <- loci_to_explore %>%
dplyr::filter(beta_gwas < 1.5) %>%
ggplot(aes(x = beta_eqtl, y = beta_gwas, colour = genome_wide_signif)) +
geom_point(size = 1.2, alpha = 1, shape = 16) +
geom_smooth(formula = y ~ x, method = "lm", level = 0.95, colour = "black", size = 0.5, fill = "#4DA2B7") +
ggpubr::stat_cor(method = "pearson", colour = "black", size = 4) +
scale_colour_manual(values = my_palette) +
scale_fill_gradient(low = "grey80", high = "darkslategrey", limits = locus_p_value_limits) +
theme_aw +
labs(x = "Multi-ancestry eQTL \nbeta",
y = "European GWAS \nbeta",
colour = "Significant GWAS SNP")
lrrk2_nalls_aquino_locus_plot <- (lrrk2_p_plot + lrrk2_beta_plot) +
plot_layout(guides = "collect") &
theme(legend.position = "bottom")
lrrk2_nalls_aquino_locus_plotggsave(lrrk2_nalls_aquino_locus_plot,
file = file.path(base_dir, "figures/nalls_aquino_lrrk2_locus_plot_no_outliers.png"),
width = 18, height = 10, units = "cm")
ggsave(lrrk2_nalls_aquino_locus_plot,
file = file.path(base_dir, "figures/nalls_aquino_lrrk2_locus_plot_no_outliers.pdf"),
width = 18, height = 10, units = "cm")The analysis was undertaken with the following script:
source(here::here("r_scripts/02d_run_coloc_gwas_immune_aquino_foo.R")The results were then processed using the following script:
source(here::here("r_scripts",
"03d_process_foo_aquino_coloc_results.R")all_results <- readRDS(str_c(args$aquino_foo_coloc_path, "/coloc_summary.Rds"))
results <- all_results$liberal
results <- results %>%
separate(dataset,
into = c("Eqtl_type", "Cell_resolution", "Cell_name", "Treatment"),
sep = "_",
remove = F) %>%
dplyr::mutate(significant = case_when(PP.H4.abf >=0.75 ~ "Colocalised",
PP.H3.abf >= 0.75 ~ "Distinct",
sum_PPH3_PPH4 >0.5 &
ratio_PPH4_PPH3 > log2(9) &
PP.H4.abf < 0.75 ~ "Suggestive",
TRUE ~ "Not_signif"),
Treatment = as_factor(Treatment) %>% fct_relevel(c("NS", "IAV", "COV")),
Cell_name = as_factor(Cell_name) %>% fct_relevel(aquino_cell_order)
) %>%
dplyr::filter(gene_name %in% PD_gene_list)
write_csv(results,
file = file.path(base_dir, "derived_data/foo_aquino_PD_gene_results.csv"))
NDD_gene_heatmap <- results %>%
dplyr::filter(significant != "Not_signif",
Eqtl_type == "expression") %>%
ggplot(aes(x = Treatment, y = fct_rev(Cell_name), fill = significant)) +
geom_tile() +
theme(panel.grid = element_blank(),
legend.position = "bottom") +
scale_fill_manual(values = my_palette) +
facet_grid(. ~gene_name, switch = "y") +
labs(fill = NULL,
y = "Cell type")
ggsave(NDD_gene_heatmap,
file = file.path(base_dir, "figures/Supp_Foo_aquino_PD_genes.pdf"),
units = "cm", width = 12, height = 16)
ggsave(NDD_gene_heatmap,
file = file.path(base_dir, "figures/Supp_Foo_aquino_PD_genes.png"),
units = "cm", width = 12, height = 16)
NDD_gene_heatmap +
labs(subtitle = "East Asian PD GWAS (Foo) with multi-ancestry eQTL (Aquino)")foo_aquino_genes_to_compare_results <- results %>%
dplyr::filter(gene_name %in% genes_to_compare,
Cell_name %in% cells_to_compare,
Eqtl_type == "expression").
The analysis was undertaken with the following script:
source(here::here("r_scripts/02e_run_coloc_gwas_immune_asian_complete.R")The results were then processed using the following script:
source(here::here("r_scripts",
"03e_process_ota_immune_coloc_results_foo_complete.R")all_results <- readRDS(str_c(args$ota_foo_coloc_path, "/coloc_summary.Rds"))
ota_cell_order <- c("mDC",
"pDC",
"CL_Mono",
"NC_Mono",
"CD16p_Mono",
"Int_Mono",
"Neu",
"LDG",
"NK",
"Naïve_CD4",
"Th1",
"Th2",
"Th17",
"Tfh",
"Fr_I_nTreg",
"Fr_II_eTreg",
"Fr_III_T",
"Naïve_CD8",
"CM_CD8",
"EM_CD8",
"TEMRA_CD8",
"Naïve_B",
"USM_B",
"SM_B",
"DN_B",
"Plasmablast"
)
results <- all_results$liberal
results <- results %>%
dplyr::mutate(significant = case_when(PP.H4.abf >=0.75 ~ "Colocalised",
PP.H3.abf >= 0.75 ~ "Distinct",
sum_PPH3_PPH4 >0.5 &
ratio_PPH4_PPH3 > log2(9) &
PP.H4.abf < 0.75 ~ "Suggestive",
TRUE ~ "Not_signif"),
dataset = as_factor(dataset) %>% fct_expand(ota_cell_order) %>% fct_relevel(ota_cell_order)
) %>%
dplyr::filter(gene_name %in% PD_gene_list)
write_csv(results,
file = file.path(base_dir, "derived_data/foo_ota_PD_gene_results.csv"))
NDD_gene_heatmap <- results %>%
dplyr::filter(significant != "Not_signif") %>%
ggplot(aes(x = gene_name, y = fct_rev(dataset), fill = significant)) +
geom_tile() +
theme(panel.grid = element_blank(),
legend.position = "bottom") +
scale_fill_manual(values = my_palette) +
# facet_grid(. ~gene_name, switch = "y") +
labs(fill = NULL,
y = "Cell type",
x = "Gene")
ggsave(NDD_gene_heatmap,
file = file.path(base_dir, "figures/Supp_Foo_ota_PD_genes.pdf"),
units = "cm", width = 12, height = 16)
ggsave(NDD_gene_heatmap,
file = file.path(base_dir, "figures/Supp_Foo_ota_PD_genes.png"),
units = "cm", width = 12, height = 16)
NDD_gene_heatmap +
labs(subtitle = "East Asian GWAS (Foo) and East Asian eQTL (Ota)")ota_foo_lrrk2_snca_results <- results %>%
dplyr::filter(dataset %in% c( "pDC", "mDC", "CL_Mono", "CD16p_Mono")) %>%
dplyr::mutate(eqtl_name = "Ota 2021",
dataset = fct_relevel(dataset, c( "pDC", "mDC", "CL_Mono", "CD16p_Mono")))
ota_foo_barchart <- ota_foo_lrrk2_snca_results %>%
dplyr::select(GWAS_1, eqtl_name, gene_name, dataset, PPH3 = PP.H3.abf, PPH4 = PP.H4.abf) %>%
pivot_longer(c(PPH3, PPH4), names_to = "Hypothesis", values_to = "Posterior_value" ) %>%
ggplot(aes(x = dataset, y = Posterior_value, fill = Hypothesis)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_manual(values = my_palette) +
scale_x_discrete(labels = plot_labels) +
geom_hline(aes(yintercept = 0.75), colour = "grey10", linetype = "dashed") +
facet_nested(gene_name ~ eqtl_name + GWAS_1,
labeller = plot_labeller) +
theme_aw +
theme(legend.position = "bottom",
strip.background.x = element_rect(color="black", linewidth=1, linetype="solid")) +
labs(x = "Cell type", y = "Coloc posterior value")
ota_foo_barchart# Function
find_coloc_results_paths <- function(directory_to_search, pattern_to_search = ".rda") {
#' Function that takes a directory output by colochelpR and searches within that directory and sub-directories for all files matching a pattern. It will output a dataframe with columns for: directory, file_name, gwas, eqtl, prior_type and gene.
#' @param directory_to_search parent directory to search within
#' @param pattern a string pattern to search in the parent directory and subdirectories
#' @return dataframe with columns for: directory, file_name, dataset, prior_type, and gene.
results_dir <- tibble(file_path = list.files(directory_to_search, recursive = T, full.names = T, pattern = pattern_to_search)) %>%
dplyr::mutate(dir = file_path %>%
str_replace(., "/[^/]*$", "") %>%
str_replace(., "/[^/]*$", ""),
file_name = basename(file_path) %>%
str_replace(., ".rda", ""),
eqtl = basename(dir),
gwas_path = dir %>%
str_replace(., "/[^/]*$", ""),
gwas = basename(gwas_path),
prior_type = file_path %>%
str_replace(., "/[^/]*$", "") %>%
basename(),
gene = str_replace(file_name, ".+_(.*)", "\\1")
)
return(results_dir)
}
# Filter significant results:
coloc_PPH4 <- results %>%
dplyr::filter(PP.H4.abf >= 0.75) %>%
dplyr::rename(hgnc_symbol = gene_name)
results_dir <- find_coloc_results_paths(directory_to_search = args$ota_foo_coloc_path, pattern_to_search = ".rda")
coloc_files <- results_dir %>%
dplyr::filter(prior_type == "liberal") %>% #Filter to match the results we are exploring
dplyr::inner_join(.,
coloc_PPH4,
c("gene" = "gene_2",
"eqtl" = "dataset")) %>%
dplyr::arrange(hgnc_symbol) %>%
dplyr::rename("dataset" = "eqtl")# Generate p value and beta comparison plots
gwas <- fread(args$foo_tidy_gwas_path)
coloc_list <- list()
for(i in 1:nrow(coloc_files)){
dataset = coloc_files$dataset[i]
gene_name = coloc_files$hgnc_symbol[i]
coloc_locus = str_c(dataset, "-", gene_name)
print(str_c("Locus number is: ", i))
print(str_c("Locus is: ", coloc_locus))
# Load the original eqtl
eqtl_path = str_c(args$eqtl_raw_data_dir, "/", dataset, "_nominal.txt")
eqtl <- fread(eqtl_path)
print("Filtering eqtl")
eqtl_tidy_gene_filtered <-
eqtl %>% # Tidy eqtl as per when coloc is run, but without including mafs or varbeta as these are not required.
dplyr::filter(Gene_name == gene_name) %>% # Find only the significant genes in the eqtl of interest
dplyr::filter(nchar(REF) == 1, # Remove the SNPs in the eqtl that are insertions/deletions (ie not true SNPs)
nchar(ALT) == 1) %>%
dplyr::mutate(eqtl_dataset = dataset,
CHR = str_replace(CHR, "chr", ""), # Convert to ensembl chr format
SNP = str_c(CHR, "_", Variant_position_start),
beta = `slope(ALT)`) %>%
dplyr::select(eqtl_dataset,
gene = Gene_id,
SNP,
CHR,
pos = Variant_position_start,
beta,
p.value = nominal_P_value,
Al1 = ALT,
Al2 = REF,
Gene_name) %>%
dplyr::filter(beta != 0) %>%
colochelpR::check_coloc_data_format(.,
beta_or_pval = "pval",
check_maf = F) %>%
dplyr::distinct(SNP, .keep_all = T)
print("Join eqtl with gwas and check direction")
locus_gwas_qtl_overlap <- colochelpR::join_coloc_datasets(df1 = gwas, # Join the gwas and eqtl datasets - this is down using the colochelpr function which raises warnings if the datasets are not aligned properly.
df2 = eqtl_tidy_gene_filtered,
harmonise = F)
data_to_plot <- locus_gwas_qtl_overlap %>%
dplyr::select(SNP, gene = Gene_name_2, tissue = eqtl_dataset_2,
Al1_gwas = Al1_1, Al2_gwas = Al2_1, beta_gwas = beta_1, p_gwas = p.value_1,
Al1_eqtl = Al1_2, Al2_eqtl = Al2_2, beta_eqtl = beta_2, p_eqtl = p.value_2) %>%
tidyr::separate(col = "SNP", into = c("chr", "pos"), sep = "_", remove = F) %>%
dplyr::mutate(pos_MB = as.numeric(pos)/1000000,
log_p_gwas = -log10(p_gwas),
log_p_eqtl = -log10(p_eqtl))
# Check if Allele 1 and 2 match between datasets, by adding a column which will be TRUE if this is the case
directionality_check <-
data_to_plot %>%
dplyr::mutate(matched_effect_and_alt_allele = case_when(Al1_gwas == Al1_eqtl & Al2_gwas == Al2_eqtl ~ TRUE,
TRUE ~ FALSE),
swapped_effect_and_alt_allele = case_when(matched_effect_and_alt_allele == FALSE & Al1_gwas == Al2_eqtl & Al2_gwas == Al1_eqtl ~ TRUE,
TRUE ~ FALSE))
directionality_check %>%
count(matched_effect_and_alt_allele)
directionality_check %>%
count(swapped_effect_and_alt_allele)
SNPs_to_exclude <- directionality_check %>%
dplyr::filter(matched_effect_and_alt_allele == FALSE, swapped_effect_and_alt_allele== FALSE) %>%
.[["SNP"]]
SNPs_to_swap <- directionality_check %>%
dplyr::filter(matched_effect_and_alt_allele == FALSE, swapped_effect_and_alt_allele== TRUE) %>%
.[["SNP"]]
harmonised_alleles <-
data_to_plot %>%
dplyr::filter(!SNP %in% SNPs_to_exclude) %>%
dplyr::mutate(beta_2 = case_when(SNP %in% SNPs_to_swap ~ (-beta_eqtl),
TRUE ~ beta_eqtl),
Al1_eqtl = case_when(SNP %in% SNPs_to_swap ~ Al1_gwas,
TRUE ~ Al1_eqtl),
Al2_eqtl = case_when(SNP %in% SNPs_to_swap ~ Al2_gwas,
TRUE ~ Al2_eqtl))
harmonised_alleles <- harmonised_alleles %>%
dplyr::mutate(genome_wide_signif = case_when(p_gwas < 1e-5 | p_eqtl < 5e-8 ~ TRUE,
TRUE ~ FALSE))
print("Creating plots")
p_plot <- harmonised_alleles %>%
ggplot(aes(x = log_p_eqtl, y = log_p_gwas, colour = genome_wide_signif)) +
geom_point(size = 0.8, alpha = 0.3) +
scale_colour_manual(values = c("#888888", "#3B9AB2")) +
geom_hline(yintercept = -log10(5*10^-8), linetype = "dashed") +
geom_hline(yintercept = -log10(1e-5), linetype = "dashed", colour = "grey50") +
geom_vline(xintercept = -log10(5*10^-8), linetype = "dashed") +
ggpubr::stat_cor(method = "pearson", cor.coef.name = "rho", colour = "black", size = 4) +
labs(x = "-log10(eQTL p-value)",
y = "-log10(GWAS p-value)",
colour = "Significant in GWAS or eQTL")
beta_plot <- harmonised_alleles %>%
ggplot(aes(x = beta_eqtl, y = beta_gwas, colour = genome_wide_signif)) +
geom_point(alpha = 0.3, size = 0.8) +
geom_smooth(formula = y ~ x, method = "lm", level = 0.99, colour = "black", size = 0.5, fill = "#4DA2B7") +
ggpubr::stat_cor(method = "pearson", cor.coef.name = "rho", colour = "black", size = 4) +
scale_colour_manual(values = c("#888888", "#3B9AB2")) +
labs(x = str_c("eQTL Beta - ", dataset),
y = "GWAS beta",
colour = "Significant in GWAS or eQTL")
locus_plot <- (p_plot + beta_plot) +
plot_layout(guides = "collect") +
plot_annotation(title = str_c("Coloc results: ", dataset, " - ", gene_name))
print("Saving data")
ggsave(locus_plot,
file = str_c(figures_path, "/summary_figures/", coloc_locus, ".png"),
device = "png",
width = 22, height = 12, units = "cm")
coloc_list[[coloc_locus]] <- list(
locus_data = harmonised_alleles,
locus_plot = locus_plot)
print("Locus complete")
}
saveRDS(coloc_list,
file = file.path(base_dir, "processed_data/ota_foo_complete/coloc/p_beta_comparisons.Rds"))# Load pre-run coloc summary list
coloc_list <- readRDS(file = file.path(base_dir, "processed_data/ota_foo_complete/coloc/p_beta_comparisons.Rds"))
loci_to_explore <- coloc_list$`CD16p_Mono-SNCA`$locus_data
loci_to_explore <- loci_to_explore %>%
arrange(log_p_gwas) %>%
mutate(genome_wide_signif = fct_relevel(as.factor(genome_wide_signif), c("TRUE", "FALSE")))
snca_p_plot <- loci_to_explore %>%
ggplot(aes(x = log_p_eqtl, y = log_p_gwas, colour = genome_wide_signif)) +
geom_point(size = 1.2, alpha = 1, shape = 16) +
scale_colour_manual(values = my_palette) +
scale_fill_gradient(low = "grey85", high = "darkslategrey", limits = locus_p_value_limits) +
ggpubr::stat_cor(method = "pearson", colour = "black", size = 4) +
theme_aw +
labs(x = "-log10(Asian eQTL p-value)",
y = "-log10(Asain GWAS p-value)",
colour = "Significant GWAS p-value")
snca_beta_plot <- loci_to_explore %>%
ggplot(aes(x = beta_eqtl, y = beta_gwas, colour = genome_wide_signif)) +
geom_point(alpha = 1, size = 1.2, shape = 16) +
geom_smooth(formula = y ~ x, method = "lm", level = 0.99, colour = "black", size = 0.5, fill = "#4DA2B7") +
ggpubr::stat_cor(method = "pearson", colour = "black", size = 4) +
scale_colour_manual(values = my_palette) +
scale_fill_gradient(low = "grey85", high = "darkslategrey", limits = locus_p_value_limits) +
theme_aw +
labs(x = "Asian eQTL beta",
y = "Asian GWAS beta",
colour = "Significant GWAS p-value")
snca_foo_ota_locus_plot <- (snca_p_plot + snca_beta_plot) +
plot_layout(guides = "collect") &
theme(legend.position = "bottom")
snca_foo_ota_locus_plotloci_to_explore <- coloc_list$`mDC-SNCA`$locus_data
loci_to_explore <- loci_to_explore %>%
arrange(log_p_gwas) %>%
mutate(genome_wide_signif = fct_relevel(as.factor(genome_wide_signif), c("TRUE", "FALSE")))
snca_p_plot <- loci_to_explore %>%
ggplot(aes(x = log_p_eqtl, y = log_p_gwas, colour = genome_wide_signif)) +
geom_point(size = 1.2, alpha = 1, shape = 16) +
scale_colour_manual(values = my_palette) +
scale_fill_gradient(low = "grey85", high = "darkslategrey", limits = locus_p_value_limits) +
ggpubr::stat_cor(method = "pearson", colour = "black", size = 4) +
theme_aw +
labs(x = "-log10(Asian eQTL p-value)",
y = "-log10(Asain GWAS p-value)",
colour = "Significant GWAS p-value")
snca_beta_plot <- loci_to_explore %>%
ggplot(aes(x = beta_eqtl, y = beta_gwas, colour = genome_wide_signif)) +
geom_point(alpha = 1, size = 1.2, shape = 16) +
geom_smooth(formula = y ~ x, method = "lm", level = 0.99, colour = "black", size = 0.5, fill = "#4DA2B7") +
ggpubr::stat_cor(method = "pearson", colour = "black", size = 4) +
scale_colour_manual(values = my_palette) +
scale_fill_gradient(low = "grey85", high = "darkslategrey", limits = locus_p_value_limits) +
theme_aw +
labs(x = "Asian eQTL beta",
y = "Asian GWAS beta",
colour = "Significant GWAS p-value")
snca_foo_ota_locus_plot <- (snca_p_plot + snca_beta_plot) +
plot_layout(guides = "collect") &
theme(legend.position = "bottom")
snca_foo_ota_locus_plotThe analysis was undertaken with the following script:
source(here::here("r_scripts/02f_run_coloc_gwas_immune_aquino_rizig.R")The results were then processed using the following script:
source(here::here("r_scripts",
"03f_process_rizig_aquino_coloc_results.R")all_results <- readRDS(str_c(args$aquino_rizig_coloc_path, "/coloc_summary.Rds"))
results <- all_results$liberal
results <- results %>%
separate(dataset,
into = c("Eqtl_type", "Cell_resolution", "Cell_name", "Treatment"),
sep = "_",
remove = F) %>%
dplyr::mutate(significant = case_when(PP.H4.abf >=0.75 ~ "Colocalised",
PP.H3.abf >= 0.75 ~ "Distinct",
sum_PPH3_PPH4 >0.5 &
ratio_PPH4_PPH3 > log2(9) &
PP.H4.abf < 0.75 ~ "Suggestive",
TRUE ~ "Not_signif"),
Treatment = as_factor(Treatment) %>% fct_relevel(c("NS", "IAV", "COV")),
Cell_name = as_factor(Cell_name) %>% fct_relevel(aquino_cell_order)
) %>%
dplyr::filter(gene_name %in% PD_gene_list)
write_csv(results,
file = file.path(base_dir, "derived_data/rizig_aquino_PD_gene_results.csv"))
NDD_gene_heatmap <- results %>%
dplyr::filter(significant != "Not_signif",
Eqtl_type == "expression") %>%
ggplot(aes(x = Treatment, y = fct_rev(Cell_name), fill = significant)) +
geom_tile() +
theme(panel.grid = element_blank(),
legend.position = "bottom") +
scale_fill_manual(values = my_palette) +
facet_grid(. ~gene_name, switch = "y") +
labs(fill = NULL,
y = "Cell type")
ggsave(NDD_gene_heatmap,
file = file.path(base_dir, "figures/Supp_Rizig_aquino_PD_genes.pdf"),
units = "cm", width = 12, height = 16)
ggsave(NDD_gene_heatmap,
file = file.path(base_dir, "figures/Supp_Rizig_aquino_PD_genes.png"),
units = "cm", width = 12, height = 16)
NDD_gene_heatmap +
labs(subtitle = "African PD GWAS (Rizig) with Multi-ancestry eQTL (Aquino)")The analysis was undertaken with the following script:
source(file.path(base_dir, "r_scripts/02g_run_coloc_gwas_immune_african.R"))
source(file.path(base_dir, "r_scripts/02g_ii_run_coloc_gwas_immune_african.R"))The results were then processed using the following script:
source(file.path(base_dir, "r_scripts",
"process_rizig_immune_coloc")all_results <- readRDS(str_c(args$rizig_nedelec_quach_coloc_path, "/coloc_summary.Rds"))
results <- all_results$liberal
results <- results %>%
dplyr::mutate(significant = case_when(PP.H4.abf >=0.75 ~ "Colocalised",
PP.H3.abf >= 0.75 ~ "Distinct",
sum_PPH3_PPH4 >0.5 &
ratio_PPH4_PPH3 > log2(9) &
PP.H4.abf < 0.75 ~ "Suggestive",
TRUE ~ "Not_signif"),
) %>%
dplyr::filter(gene_name %in% PD_gene_list)
write_csv(results,
file = file.path(base_dir, "derived_data/rizig_nedelecQuach_PD_gene_results.csv"))There are no colocalised, distinct or suggestive results.
ancestry_results <- bind_rows(Nalls_aquino_genes_to_compare_results, foo_aquino_genes_to_compare_results) %>%
filter((Treatment == "NS" & Cell_name %in% c("Monocyte", "Mono-cd14", "Dendritic-plasmacytoid")) |
(Treatment == "COV" & Cell_name == "Monocyte")) %>%
dplyr::mutate(cell_treatment = str_c(Cell_name, "_", Treatment) %>%
as_factor() %>%
fct_relevel(., c("Dendritic-plasmacytoid_NS", "Monocyte_NS", "Mono-cd14_NS" ,"Monocyte_COV")),
GWAS_1 = as_factor(GWAS_1) %>% fct_relevel(., c("PD2019_ex23andMe", "foo2020", "rizig2023")),
gene_name = fct_relevel(gene_name, c("LRRK2", "SNCA", "GBA")),
eqtl_name = "Aquino 2023"
) %>%
dplyr::filter(gene_name != 'GBA')
aquino_barchart <- ancestry_results %>%
dplyr::select(GWAS_1, Eqtl_type, gene_name, Cell_resolution, Cell_name, cell_treatment, Treatment, PPH3 = PP.H3.abf, PPH4 = PP.H4.abf, eqtl_name) %>%
pivot_longer(c(PPH3, PPH4), names_to = "Hypothesis", values_to = "Posterior_value" ) %>%
ggplot(aes(x = cell_treatment, y = Posterior_value, fill = Hypothesis)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_manual(values = my_palette) +
scale_x_discrete(labels = plot_labels) +
geom_hline(aes(yintercept = 0.75), colour = "grey10", linetype = "dashed") +
facet_nested(gene_name ~ eqtl_name + GWAS_1,
labeller = plot_labeller) +
theme_aw +
theme(legend.position = "bottom",
strip.background.x = element_rect(color="black", linewidth=1, linetype="solid")) +
labs(x = "Cell type", y = "Coloc posterior value")
aquino_barchartfigure_2 <- ((aquino_barchart + labs(x = NULL) | ota_foo_barchart + labs(y = NULL, x = NULL)) +
plot_layout(widths = c(2,1),
guides = "collect")) /
(( lrrk2_p_plot + labs(x = "-log10(p value)", y = "European GWAS \nMulti-ancestry eQTL \n\n-log10(p value)") |
lrrk2_beta_plot + labs(x = "beta", y = "beta") ) /
(snca_p_plot + labs(x = "-log10(p value)", y = "East Asian GWAS \nEast Asian eQTL \n\n-log10(p value)") |
snca_beta_plot + labs(x = "beta", y = "beta")) +
plot_layout(guides = "collect")) +
plot_layout(heights = c(4,7)) +
plot_annotation(tag_levels = "a") &
theme(legend.position = "bottom")
figure_2ggsave(figure_2, height = 30, width = 22, units = "cm",
file = file.path(base_dir, "figures/figure_2.pdf"))
ggsave(figure_2, height = 30, width = 22, units = "cm",
file = file.path(base_dir, "figures/figure_2.png"))sessionInfo()## R version 4.2.0 (2022-04-22)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Rocky Linux 8.7 (Green Obsidian)
##
## Matrix products: default
## BLAS/LAPACK: /flask/apps/eb/software/FlexiBLAS/3.0.4-GCC-11.2.0/lib64/libflexiblas.so.3.0
##
## locale:
## [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8
## [5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8
## [7] LC_PAPER=en_GB.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] gt_0.11.0 ggpubr_0.6.0 ggh4x_0.2.8
## [4] ggrepel_0.9.5 wesanderson_0.3.6 paletteer_1.4.1
## [7] sciRmdTheme_0.1 viridis_0.6.2 viridisLite_0.4.1
## [10] patchwork_1.2.0 DBI_1.2.2 pheatmap_1.0.12
## [13] LDlinkR_1.4.0 qdapTools_1.3.5 biomaRt_2.52.0
## [16] tidytable_0.11.0 plyranges_1.18.0 GenomicRanges_1.50.2
## [19] GenomeInfoDb_1.32.3 IRanges_2.32.0 S4Vectors_0.36.2
## [22] BiocGenerics_0.42.0 data.table_1.14.4 colochelpR_0.99.2
## [25] coloc_5.2.3 forcats_1.0.0 stringr_1.5.1
## [28] dplyr_1.1.0 purrr_1.0.1 readr_2.1.5
## [31] tidyr_1.3.1 tibble_3.2.0 ggplot2_3.4.2
## [34] tidyverse_1.3.2 here_1.0.1
##
## loaded via a namespace (and not attached):
## [1] readxl_1.4.1 backports_1.4.1
## [3] BiocFileCache_2.6.1 plyr_1.8.9
## [5] splines_4.2.0 BiocParallel_1.32.6
## [7] digest_0.6.35 htmltools_0.5.7
## [9] fansi_1.0.3 magrittr_2.0.3
## [11] memoise_2.0.1 googlesheets4_1.1.1
## [13] tzdb_0.4.0 Biostrings_2.66.0
## [15] modelr_0.1.11 matrixStats_1.3.0
## [17] vroom_1.6.5 prettyunits_1.1.1
## [19] colorspace_2.1-0 blob_1.2.4
## [21] rvest_1.0.4 rappdirs_0.3.3
## [23] haven_2.5.4 xfun_0.34
## [25] crayon_1.5.2 RCurl_1.98-1.14
## [27] jsonlite_1.8.8 glue_1.6.2
## [29] gtable_0.3.1 gargle_1.5.0
## [31] zlibbioc_1.44.0 XVector_0.38.0
## [33] DelayedArray_0.22.0 car_3.1-2
## [35] abind_1.4-5 scales_1.3.0
## [37] rstatix_0.7.2 Rcpp_1.0.12
## [39] progress_1.2.2 bit_4.0.5
## [41] httr_1.4.7 RColorBrewer_1.1-3
## [43] ellipsis_0.3.2 farver_2.1.1
## [45] pkgconfig_2.0.3 reshape_0.8.9
## [47] XML_3.99-0.16.1 sass_0.4.1
## [49] dbplyr_2.2.1 utf8_1.2.2
## [51] labeling_0.4.3 tidyselect_1.2.1
## [53] rlang_1.1.1 AnnotationDbi_1.60.2
## [55] munsell_0.5.1 cellranger_1.1.0
## [57] tools_4.2.0 cachem_1.0.8
## [59] cli_3.6.3 generics_0.1.3
## [61] RSQLite_2.3.6 broom_1.0.5
## [63] evaluate_0.23 fastmap_1.1.1
## [65] yaml_2.3.5 rematch2_2.1.2
## [67] knitr_1.40 bit64_4.0.5
## [69] fs_1.6.4 KEGGREST_1.38.0
## [71] nlme_3.1-164 xml2_1.3.6
## [73] compiler_4.2.0 rstudioapi_0.13
## [75] filelock_1.0.3 curl_5.2.1
## [77] susieR_0.12.35 png_0.1-8
## [79] ggsignif_0.6.4 reprex_2.1.0
## [81] bslib_0.3.1 stringi_1.7.6
## [83] highr_0.9 lattice_0.22-6
## [85] Matrix_1.6-5 vctrs_0.6.5
## [87] pillar_1.9.0 lifecycle_1.0.3
## [89] jquerylib_0.1.4 bitops_1.0-7
## [91] irlba_2.3.5.1 rtracklayer_1.56.1
## [93] R6_2.5.1 BiocIO_1.6.0
## [95] bookdown_0.29 gridExtra_2.3
## [97] codetools_0.2-20 assertthat_0.2.1
## [99] chron_2.3-59 SummarizedExperiment_1.26.1
## [101] rprojroot_2.0.3 rjson_0.2.21
## [103] withr_2.5.0 GenomicAlignments_1.32.1
## [105] Rsamtools_2.14.0 GenomeInfoDbData_1.2.9
## [107] mgcv_1.9-1 parallel_4.2.0
## [109] hms_1.1.2 grid_4.2.0
## [111] rmarkdown_2.14 carData_3.0-5
## [113] MatrixGenerics_1.8.1 googledrive_2.1.0
## [115] mixsqp_0.3-54 Biobase_2.56.0
## [117] lubridate_1.8.0 restfulr_0.0.15